summaryrefslogtreecommitdiffstats
path: root/src/video_core/engines/sw_blitter/blitter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/engines/sw_blitter/blitter.cpp')
-rw-r--r--src/video_core/engines/sw_blitter/blitter.cpp238
1 files changed, 238 insertions, 0 deletions
diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp
new file mode 100644
index 000000000..2f1ea4626
--- /dev/null
+++ b/src/video_core/engines/sw_blitter/blitter.cpp
@@ -0,0 +1,238 @@
+// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+#include "video_core/engines/sw_blitter/blitter.h"
+#include "video_core/engines/sw_blitter/converter.h"
+#include "video_core/memory_manager.h"
+#include "video_core/surface.h"
+#include "video_core/textures/decoders.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+using VideoCore::Surface::BytesPerBlock;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+
+namespace Tegra::Engines::Blitter {
+
+using namespace Texture;
+
+namespace {
+
+constexpr size_t ir_components = 4;
+
+void NearestNeighbor(std::span<const u8> input, std::span<u8> output, u32 src_width, u32 src_height,
+ u32 dst_width, u32 dst_height, size_t bpp) {
+ const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32));
+ const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32));
+ size_t src_y = 0;
+ for (u32 y = 0; y < dst_height; y++) {
+ size_t src_x = 0;
+ for (u32 x = 0; x < dst_width; x++) {
+ const size_t read_from = ((src_y * src_width + src_x) >> 32) * bpp;
+ const size_t write_to = (y * dst_width + x) * bpp;
+
+ std::memcpy(&output[write_to], &input[read_from], bpp);
+ src_x += dx_du;
+ }
+ src_y += dy_dv;
+ }
+}
+
+void NearestNeighborFast(std::span<const f32> input, std::span<f32> output, u32 src_width,
+ u32 src_height, u32 dst_width, u32 dst_height) {
+ const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32));
+ const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32));
+ size_t src_y = 0;
+ for (u32 y = 0; y < dst_height; y++) {
+ size_t src_x = 0;
+ for (u32 x = 0; x < dst_width; x++) {
+ const size_t read_from = ((src_y * src_width + src_x) >> 32) * ir_components;
+ const size_t write_to = (y * dst_width + x) * ir_components;
+
+ std::memcpy(&output[write_to], &input[read_from], sizeof(f32) * ir_components);
+ src_x += dx_du;
+ }
+ src_y += dy_dv;
+ }
+}
+
+void Bilinear(std::span<const f32> input, std::span<f32> output, size_t src_width,
+ size_t src_height, size_t dst_width, size_t dst_height) {
+ const auto bilinear_sample = [](std::span<const f32> x0_y0, std::span<const f32> x1_y0,
+ std::span<const f32> x0_y1, std::span<const f32> x1_y1,
+ f32 weight_x, f32 weight_y) {
+ std::array<f32, ir_components> result{};
+ for (size_t i = 0; i < ir_components; i++) {
+ const f32 a = std::lerp(x0_y0[i], x1_y0[i], weight_x);
+ const f32 b = std::lerp(x0_y1[i], x1_y1[i], weight_x);
+ result[i] = std::lerp(a, b, weight_y);
+ }
+ return result;
+ };
+ const f32 dx_du =
+ dst_width > 1 ? static_cast<f32>(src_width - 1) / static_cast<f32>(dst_width - 1) : 0.f;
+ const f32 dy_dv =
+ dst_height > 1 ? static_cast<f32>(src_height - 1) / static_cast<f32>(dst_height - 1) : 0.f;
+ for (u32 y = 0; y < dst_height; y++) {
+ for (u32 x = 0; x < dst_width; x++) {
+ const f32 x_low = std::floor(static_cast<f32>(x) * dx_du);
+ const f32 y_low = std::floor(static_cast<f32>(y) * dy_dv);
+ const f32 x_high = std::ceil(static_cast<f32>(x) * dx_du);
+ const f32 y_high = std::ceil(static_cast<f32>(y) * dy_dv);
+ const f32 weight_x = (static_cast<f32>(x) * dx_du) - x_low;
+ const f32 weight_y = (static_cast<f32>(y) * dy_dv) - y_low;
+
+ const auto read_src = [&](f32 in_x, f32 in_y) {
+ const size_t read_from =
+ ((static_cast<size_t>(in_x) * src_width + static_cast<size_t>(in_y)) >> 32) *
+ ir_components;
+ return std::span<const f32>(&input[read_from], ir_components);
+ };
+
+ auto x0_y0 = read_src(x_low, y_low);
+ auto x1_y0 = read_src(x_high, y_low);
+ auto x0_y1 = read_src(x_low, y_high);
+ auto x1_y1 = read_src(x_high, y_high);
+
+ const auto result = bilinear_sample(x0_y0, x1_y0, x0_y1, x1_y1, weight_x, weight_y);
+
+ const size_t write_to = (y * dst_width + x) * ir_components;
+
+ std::memcpy(&output[write_to], &result, sizeof(f32) * ir_components);
+ }
+ }
+}
+
+} // namespace
+
+struct SoftwareBlitEngine::BlitEngineImpl {
+ std::vector<u8> tmp_buffer;
+ std::vector<u8> src_buffer;
+ std::vector<u8> dst_buffer;
+ std::vector<f32> intermediate_src;
+ std::vector<f32> intermediate_dst;
+ ConverterFactory converter_factory;
+};
+
+SoftwareBlitEngine::SoftwareBlitEngine(MemoryManager& memory_manager_)
+ : memory_manager{memory_manager_} {
+ impl = std::make_unique<BlitEngineImpl>();
+}
+
+SoftwareBlitEngine::~SoftwareBlitEngine() = default;
+
+bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
+ Fermi2D::Config& config) {
+ const auto get_surface_size = [](Fermi2D::Surface& surface, u32 bytes_per_pixel) {
+ if (surface.linear == Fermi2D::MemoryLayout::BlockLinear) {
+ return CalculateSize(true, bytes_per_pixel, surface.width, surface.height,
+ surface.depth, surface.block_height, surface.block_depth);
+ }
+ return static_cast<size_t>(surface.pitch * surface.height);
+ };
+ const auto process_pitch_linear = [](bool unpack, std::span<const u8> input,
+ std::span<u8> output, u32 extent_x, u32 extent_y,
+ u32 pitch, u32 x0, u32 y0, size_t bpp) {
+ const size_t base_offset = x0 * bpp;
+ const size_t copy_size = extent_x * bpp;
+ for (u32 y = y0; y < extent_y; y++) {
+ const size_t first_offset = y * pitch + base_offset;
+ const size_t second_offset = y * extent_x * bpp;
+ u8* write_to = unpack ? &output[first_offset] : &output[second_offset];
+ const u8* read_from = unpack ? &input[second_offset] : &input[first_offset];
+ std::memcpy(write_to, read_from, copy_size);
+ }
+ };
+
+ const u32 src_extent_x = config.src_x1 - config.src_x0;
+ const u32 src_extent_y = config.src_y1 - config.src_y0;
+
+ const u32 dst_extent_x = config.dst_x1 - config.dst_x0;
+ const u32 dst_extent_y = config.dst_y1 - config.dst_y0;
+ const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
+ const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format));
+ const size_t src_size = get_surface_size(src, src_bytes_per_pixel);
+ impl->tmp_buffer.resize(src_size);
+ memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size);
+
+ const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel;
+
+ const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel;
+
+ impl->src_buffer.resize(src_copy_size);
+
+ const bool no_passthrough =
+ src.format != dst.format || src_extent_x != dst_extent_x || src_extent_y != dst_extent_y;
+
+ const auto convertion_phase_same_format = [&]() {
+ NearestNeighbor(impl->src_buffer, impl->dst_buffer, src_extent_x, src_extent_y,
+ dst_extent_x, dst_extent_y, dst_bytes_per_pixel);
+ };
+
+ const auto convertion_phase_ir = [&]() {
+ auto* input_converter = impl->converter_factory.GetFormatConverter(src.format);
+ impl->intermediate_src.resize((src_copy_size / src_bytes_per_pixel) * ir_components);
+ impl->intermediate_dst.resize((dst_copy_size / dst_bytes_per_pixel) * ir_components);
+ input_converter->ConvertTo(impl->src_buffer, impl->intermediate_src);
+
+ if (config.filter != Fermi2D::Filter::Bilinear) {
+ NearestNeighborFast(impl->intermediate_src, impl->intermediate_dst, src_extent_x,
+ src_extent_y, dst_extent_x, dst_extent_y);
+ } else {
+ Bilinear(impl->intermediate_src, impl->intermediate_dst, src_extent_x, src_extent_y,
+ dst_extent_x, dst_extent_y);
+ }
+
+ auto* output_converter = impl->converter_factory.GetFormatConverter(dst.format);
+ output_converter->ConvertFrom(impl->intermediate_dst, impl->dst_buffer);
+ };
+
+ // Do actuall Blit
+
+ impl->dst_buffer.resize(dst_copy_size);
+ if (src.linear == Fermi2D::MemoryLayout::BlockLinear) {
+ UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width,
+ src.height, src.depth, config.src_x0, config.src_y0, src_extent_x,
+ src_extent_y, src.block_height, src.block_depth,
+ src_extent_x * src_bytes_per_pixel);
+ } else {
+ process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y,
+ src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel);
+ }
+
+ // Conversion Phase
+ if (no_passthrough) {
+ if (src.format != dst.format || config.filter == Fermi2D::Filter::Bilinear) {
+ convertion_phase_ir();
+ } else {
+ convertion_phase_same_format();
+ }
+ } else {
+ impl->dst_buffer.swap(impl->src_buffer);
+ }
+
+ const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel);
+ impl->tmp_buffer.resize(dst_size);
+ memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
+
+ if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) {
+ SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width,
+ dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x,
+ dst_extent_y, dst.block_height, dst.block_depth,
+ dst_extent_x * dst_bytes_per_pixel);
+ } else {
+ process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y,
+ dst.pitch, config.dst_x0, config.dst_y0,
+ static_cast<size_t>(dst_bytes_per_pixel));
+ }
+ memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
+ return true;
+}
+
+} // namespace Tegra::Engines::Blitter